# Set the working directory
setwd("/Users/dreamer/Downloads/Godaddy/godaddy_microbusiness_forecasting")
# Importing the libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
# ggplot2, purrr, tibble, dplyr, tidyr, stringr, readr, forcats
library(mice)
##
## Attaching package: 'mice'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(maps)
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
#library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
#library(dplyr)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(gbm)
## Loaded gbm 2.1.8.1
#library(png)
#library(ggmap)
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
##
## The following object is masked from 'package:maps':
##
## unemp
library(mapdata)
library(corrplot)
## corrplot 0.92 loaded
# Code starts here
Explore the datasets to get a better understanding of the data.
Load the train and test datasets into dataframes.
# Load train.csv into a dataframe
train_df <- read.csv("./datasets/train.csv")
# Load test.csv into a dataframe
test_df <- read.csv("./datasets/test.csv")
# Load census_starter.csv into a dataframe
census_df <- read.csv("./datasets/census_starter.csv")
Check the dataframes
After reading the CSV files into dataframes, we should check whether the data is loaded correctly or not. We can use the head() function of R to display the first few rows of the dataframes and tail() function to display the last rows. This will display the first and last 10 rows of the train, test and census dataframes. We can also use other pandas functions such as info() and describe() to get more information about the dataframes, such as column names, data types, and summary statistics.
# Display the first 10 rows of the dataframes
head(train_df, n = 10)
## row_id cfips county state first_day_of_month
## 1 1001_2019-08-01 1001 Autauga County Alabama 2019-08-01
## 2 1001_2019-09-01 1001 Autauga County Alabama 2019-09-01
## 3 1001_2019-10-01 1001 Autauga County Alabama 2019-10-01
## 4 1001_2019-11-01 1001 Autauga County Alabama 2019-11-01
## 5 1001_2019-12-01 1001 Autauga County Alabama 2019-12-01
## 6 1001_2020-01-01 1001 Autauga County Alabama 2020-01-01
## 7 1001_2020-02-01 1001 Autauga County Alabama 2020-02-01
## 8 1001_2020-03-01 1001 Autauga County Alabama 2020-03-01
## 9 1001_2020-04-01 1001 Autauga County Alabama 2020-04-01
## 10 1001_2020-05-01 1001 Autauga County Alabama 2020-05-01
## microbusiness_density active
## 1 3.007682 1249
## 2 2.884870 1198
## 3 3.055843 1269
## 4 2.993233 1243
## 5 2.993233 1243
## 6 2.969090 1242
## 7 2.909326 1217
## 8 2.933231 1227
## 9 3.000167 1255
## 10 3.004948 1257
head(test_df, n = 10)
## row_id cfips first_day_of_month
## 1 1001_2022-11-01 1001 2022-11-01
## 2 1003_2022-11-01 1003 2022-11-01
## 3 1005_2022-11-01 1005 2022-11-01
## 4 1007_2022-11-01 1007 2022-11-01
## 5 1009_2022-11-01 1009 2022-11-01
## 6 1011_2022-11-01 1011 2022-11-01
## 7 1013_2022-11-01 1013 2022-11-01
## 8 1015_2022-11-01 1015 2022-11-01
## 9 1017_2022-11-01 1017 2022-11-01
## 10 1019_2022-11-01 1019 2022-11-01
head(census_df, n = 10)
## pct_bb_2017 pct_bb_2018 pct_bb_2019 pct_bb_2020 pct_bb_2021 cfips
## 1 76.6 78.9 80.6 82.7 85.5 1001
## 2 74.5 78.1 81.8 85.1 87.9 1003
## 3 57.2 60.4 60.5 64.6 64.6 1005
## 4 62.0 66.1 69.2 76.1 74.6 1007
## 5 65.8 68.5 73.0 79.6 81.0 1009
## 6 49.4 58.9 60.1 60.6 59.4 1011
## 7 58.2 62.1 64.6 73.6 76.3 1013
## 8 71.0 73.0 75.1 79.8 81.6 1015
## 9 62.8 66.5 69.4 74.5 77.1 1017
## 10 67.5 68.6 70.7 75.0 76.7 1019
## pct_college_2017 pct_college_2018 pct_college_2019 pct_college_2020
## 1 14.5 15.9 16.1 16.7
## 2 20.4 20.7 21.0 20.2
## 3 7.6 7.8 7.6 7.3
## 4 8.1 7.6 6.5 7.4
## 5 8.7 8.1 8.6 8.9
## 6 6.6 7.4 7.4 6.1
## 7 9.6 9.7 9.7 10.1
## 8 10.2 10.2 10.5 10.5
## 9 9.0 9.3 9.5 10.5
## 10 6.6 6.8 6.6 6.3
## pct_college_2021 pct_foreign_born_2017 pct_foreign_born_2018
## 1 16.4 2.1 2.0
## 2 20.6 3.2 3.4
## 3 6.7 2.7 2.5
## 4 7.9 1.0 1.4
## 5 9.3 4.5 4.4
## 6 8.1 1.8 0.9
## 7 8.1 1.0 1.4
## 8 11.4 2.6 2.7
## 9 9.6 1.3 1.4
## 10 6.2 0.7 0.8
## pct_foreign_born_2019 pct_foreign_born_2020 pct_foreign_born_2021
## 1 2.3 2.3 2.1
## 2 3.7 3.4 3.5
## 3 2.7 2.6 2.6
## 4 1.5 1.6 1.1
## 5 4.5 4.4 4.5
## 6 0.7 1.5 1.2
## 7 0.8 1.9 1.7
## 8 2.7 2.5 2.5
## 9 1.8 1.9 2.0
## 10 0.9 1.9 2.0
## pct_it_workers_2017 pct_it_workers_2018 pct_it_workers_2019
## 1 1.3 1.1 0.7
## 2 1.4 1.3 1.4
## 3 0.5 0.3 0.8
## 4 1.2 1.4 1.6
## 5 1.3 1.4 0.9
## 6 0.4 0.3 0.5
## 7 1.1 1.4 1.7
## 8 1.4 1.4 1.2
## 9 2.4 2.1 2.1
## 10 1.4 1.3 1.2
## pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2017
## 1 0.6 1.1 55317
## 2 1.0 1.3 52562
## 3 1.1 0.8 33368
## 4 1.7 2.1 43404
## 5 1.1 0.9 47412
## 6 0.3 0.2 29655
## 7 1.3 1.4 36326
## 8 1.0 1.0 43686
## 9 2.3 1.8 37342
## 10 0.9 0.4 40041
## median_hh_inc_2018 median_hh_inc_2019 median_hh_inc_2020 median_hh_inc_2021
## 1 58786 58731 57982 62660
## 2 55962 58320 61756 64346
## 3 34186 32525 34990 36422
## 4 45340 47542 51721 54277
## 5 48695 49358 48922 52830
## 6 32152 37785 33866 29063
## 7 39109 40688 44850 45236
## 8 45197 47255 50128 50977
## 9 39872 42289 43875 47232
## 10 41014 41919 42509 43475
# Display the last 10 rows of the dataframes
tail(train_df, n = 10)
## row_id cfips county state first_day_of_month
## 122256 56045_2022-01-01 56045 Weston County Wyoming 2022-01-01
## 122257 56045_2022-02-01 56045 Weston County Wyoming 2022-02-01
## 122258 56045_2022-03-01 56045 Weston County Wyoming 2022-03-01
## 122259 56045_2022-04-01 56045 Weston County Wyoming 2022-04-01
## 122260 56045_2022-05-01 56045 Weston County Wyoming 2022-05-01
## 122261 56045_2022-06-01 56045 Weston County Wyoming 2022-06-01
## 122262 56045_2022-07-01 56045 Weston County Wyoming 2022-07-01
## 122263 56045_2022-08-01 56045 Weston County Wyoming 2022-08-01
## 122264 56045_2022-09-01 56045 Weston County Wyoming 2022-09-01
## 122265 56045_2022-10-01 56045 Weston County Wyoming 2022-10-01
## microbusiness_density active
## 122256 1.749688 98
## 122257 1.749688 98
## 122258 1.767542 99
## 122259 1.767542 99
## 122260 1.803249 101
## 122261 1.803249 101
## 122262 1.803249 101
## 122263 1.785395 100
## 122264 1.785395 100
## 122265 1.785395 100
tail(test_df, n = 10)
## row_id cfips first_day_of_month
## 25071 56027_2023-06-01 56027 2023-06-01
## 25072 56029_2023-06-01 56029 2023-06-01
## 25073 56031_2023-06-01 56031 2023-06-01
## 25074 56033_2023-06-01 56033 2023-06-01
## 25075 56035_2023-06-01 56035 2023-06-01
## 25076 56037_2023-06-01 56037 2023-06-01
## 25077 56039_2023-06-01 56039 2023-06-01
## 25078 56041_2023-06-01 56041 2023-06-01
## 25079 56043_2023-06-01 56043 2023-06-01
## 25080 56045_2023-06-01 56045 2023-06-01
tail(census_df, n = 10)
## pct_bb_2017 pct_bb_2018 pct_bb_2019 pct_bb_2020 pct_bb_2021 cfips
## 3133 67.8 73.3 72.2 77.6 74.3 56027
## 3134 80.4 82.2 85.7 86.7 87.9 56029
## 3135 68.4 69.2 70.9 74.5 75.1 56031
## 3136 80.9 81.1 82.9 82.8 84.7 56033
## 3137 82.9 81.7 85.6 88.1 89.8 56035
## 3138 82.2 82.4 84.0 86.7 88.4 56037
## 3139 83.5 85.9 87.1 89.1 90.5 56039
## 3140 83.8 88.2 89.5 91.4 90.6 56041
## 3141 76.4 78.3 78.2 82.8 85.4 56043
## 3142 71.1 73.3 76.8 79.7 81.3 56045
## pct_college_2017 pct_college_2018 pct_college_2019 pct_college_2020
## 3133 14.2 13.0 14.5 14.7
## 3134 19.2 19.1 21.5 21.8
## 3135 14.6 14.1 10.9 10.8
## 3136 18.3 16.2 16.5 16.8
## 3137 19.2 19.0 16.7 21.7
## 3138 15.3 15.2 14.8 13.7
## 3139 37.7 37.8 38.9 37.2
## 3140 11.9 10.5 11.1 12.6
## 3141 15.4 15.0 15.4 15.0
## 3142 14.1 13.5 13.4 12.7
## pct_college_2021 pct_foreign_born_2017 pct_foreign_born_2018
## 3133 17.1 0.3 0.2
## 3134 22.0 2.4 2.5
## 3135 10.6 3.0 2.8
## 3136 18.0 2.3 2.1
## 3137 20.9 3.9 3.1
## 3138 12.4 5.0 5.3
## 3139 38.3 10.8 11.2
## 3140 12.3 2.9 3.1
## 3141 17.2 2.3 1.4
## 3142 13.9 3.8 4.1
## pct_foreign_born_2019 pct_foreign_born_2020 pct_foreign_born_2021
## 3133 0.3 0.0 0.0
## 3134 2.5 2.4 2.5
## 3135 3.0 2.6 2.0
## 3136 2.0 2.5 2.2
## 3137 4.4 5.1 5.1
## 3138 4.7 5.2 5.5
## 3139 11.8 11.4 11.1
## 3140 2.9 2.9 2.9
## 3141 1.6 2.2 1.0
## 3142 1.7 2.3 1.6
## pct_it_workers_2017 pct_it_workers_2018 pct_it_workers_2019
## 3133 1.2 1.0 0.9
## 3134 1.7 2.1 2.6
## 3135 0.0 0.0 0.0
## 3136 1.6 1.7 1.5
## 3137 0.1 0.0 0.0
## 3138 0.6 0.6 1.0
## 3139 0.7 1.2 1.4
## 3140 1.2 1.2 1.4
## 3141 1.3 1.0 0.9
## 3142 0.6 0.6 0.0
## pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2017
## 3133 0.0 1.9 36793
## 3134 1.9 1.7 60828
## 3135 0.0 0.0 47380
## 3136 1.8 1.3 56455
## 3137 0.0 0.0 84911
## 3138 0.9 1.0 71083
## 3139 1.5 2.0 80049
## 3140 1.7 0.9 54672
## 3141 0.9 1.1 51362
## 3142 0.0 0.0 59605
## median_hh_inc_2018 median_hh_inc_2019 median_hh_inc_2020
## 3133 39359 39150 46111
## 3134 62666 63582 63684
## 3135 47096 50903 57784
## 3136 58521 60807 59380
## 3137 78680 77403 78655
## 3138 73008 74843 73384
## 3139 83831 84678 87053
## 3140 58235 63403 72458
## 3141 53426 54158 57306
## 3142 52867 57031 53333
## median_hh_inc_2021
## 3133 48688
## 3134 62586
## 3135 59402
## 3136 62531
## 3137 82342
## 3138 76668
## 3139 94498
## 3140 75106
## 3141 62271
## 3142 65566
# Display information about the train dataframe
str(train_df)
## 'data.frame': 122265 obs. of 7 variables:
## $ row_id : chr "1001_2019-08-01" "1001_2019-09-01" "1001_2019-10-01" "1001_2019-11-01" ...
## $ cfips : int 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
## $ county : chr "Autauga County" "Autauga County" "Autauga County" "Autauga County" ...
## $ state : chr "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ first_day_of_month : chr "2019-08-01" "2019-09-01" "2019-10-01" "2019-11-01" ...
## $ microbusiness_density: num 3.01 2.88 3.06 2.99 2.99 ...
## $ active : int 1249 1198 1269 1243 1243 1242 1217 1227 1255 1257 ...
cat(rep("=", 40), "\n") # Print a line of 40 equal signs
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
summary(train_df)
## row_id cfips county state
## Length:122265 Min. : 1001 Length:122265 Length:122265
## Class :character 1st Qu.:18177 Class :character Class :character
## Mode :character Median :29173 Mode :character Mode :character
## Mean :30376
## 3rd Qu.:45077
## Max. :56045
## first_day_of_month microbusiness_density active
## Length:122265 Min. : 0.000 Min. : 0
## Class :character 1st Qu.: 1.639 1st Qu.: 145
## Mode :character Median : 2.587 Median : 488
## Mean : 3.818 Mean : 6443
## 3rd Qu.: 4.519 3rd Qu.: 2124
## Max. :284.340 Max. :1167744
# Display information about the test dataframe
str(test_df)
## 'data.frame': 25080 obs. of 3 variables:
## $ row_id : chr "1001_2022-11-01" "1003_2022-11-01" "1005_2022-11-01" "1007_2022-11-01" ...
## $ cfips : int 1001 1003 1005 1007 1009 1011 1013 1015 1017 1019 ...
## $ first_day_of_month: chr "2022-11-01" "2022-11-01" "2022-11-01" "2022-11-01" ...
cat(rep("=", 40), "\n") # Print a line of 40 equal signs
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
summary(test_df)
## row_id cfips first_day_of_month
## Length:25080 Min. : 1001 Length:25080
## Class :character 1st Qu.:18177 Class :character
## Mode :character Median :29173 Mode :character
## Mean :30376
## 3rd Qu.:45077
## Max. :56045
# Display information about the census dataframe
str(census_df)
## 'data.frame': 3142 obs. of 26 variables:
## $ pct_bb_2017 : num 76.6 74.5 57.2 62 65.8 49.4 58.2 71 62.8 67.5 ...
## $ pct_bb_2018 : num 78.9 78.1 60.4 66.1 68.5 58.9 62.1 73 66.5 68.6 ...
## $ pct_bb_2019 : num 80.6 81.8 60.5 69.2 73 60.1 64.6 75.1 69.4 70.7 ...
## $ pct_bb_2020 : num 82.7 85.1 64.6 76.1 79.6 60.6 73.6 79.8 74.5 75 ...
## $ pct_bb_2021 : num 85.5 87.9 64.6 74.6 81 59.4 76.3 81.6 77.1 76.7 ...
## $ cfips : int 1001 1003 1005 1007 1009 1011 1013 1015 1017 1019 ...
## $ pct_college_2017 : num 14.5 20.4 7.6 8.1 8.7 6.6 9.6 10.2 9 6.6 ...
## $ pct_college_2018 : num 15.9 20.7 7.8 7.6 8.1 7.4 9.7 10.2 9.3 6.8 ...
## $ pct_college_2019 : num 16.1 21 7.6 6.5 8.6 7.4 9.7 10.5 9.5 6.6 ...
## $ pct_college_2020 : num 16.7 20.2 7.3 7.4 8.9 6.1 10.1 10.5 10.5 6.3 ...
## $ pct_college_2021 : num 16.4 20.6 6.7 7.9 9.3 8.1 8.1 11.4 9.6 6.2 ...
## $ pct_foreign_born_2017: num 2.1 3.2 2.7 1 4.5 1.8 1 2.6 1.3 0.7 ...
## $ pct_foreign_born_2018: num 2 3.4 2.5 1.4 4.4 0.9 1.4 2.7 1.4 0.8 ...
## $ pct_foreign_born_2019: num 2.3 3.7 2.7 1.5 4.5 0.7 0.8 2.7 1.8 0.9 ...
## $ pct_foreign_born_2020: num 2.3 3.4 2.6 1.6 4.4 1.5 1.9 2.5 1.9 1.9 ...
## $ pct_foreign_born_2021: num 2.1 3.5 2.6 1.1 4.5 1.2 1.7 2.5 2 2 ...
## $ pct_it_workers_2017 : num 1.3 1.4 0.5 1.2 1.3 0.4 1.1 1.4 2.4 1.4 ...
## $ pct_it_workers_2018 : num 1.1 1.3 0.3 1.4 1.4 0.3 1.4 1.4 2.1 1.3 ...
## $ pct_it_workers_2019 : num 0.7 1.4 0.8 1.6 0.9 0.5 1.7 1.2 2.1 1.2 ...
## $ pct_it_workers_2020 : num 0.6 1 1.1 1.7 1.1 0.3 1.3 1 2.3 0.9 ...
## $ pct_it_workers_2021 : num 1.1 1.3 0.8 2.1 0.9 0.2 1.4 1 1.8 0.4 ...
## $ median_hh_inc_2017 : int 55317 52562 33368 43404 47412 29655 36326 43686 37342 40041 ...
## $ median_hh_inc_2018 : num 58786 55962 34186 45340 48695 ...
## $ median_hh_inc_2019 : int 58731 58320 32525 47542 49358 37785 40688 47255 42289 41919 ...
## $ median_hh_inc_2020 : num 57982 61756 34990 51721 48922 ...
## $ median_hh_inc_2021 : num 62660 64346 36422 54277 52830 ...
cat(rep("=", 40), "\n") # Print a line of 40 equal signs
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
summary(census_df)
## pct_bb_2017 pct_bb_2018 pct_bb_2019 pct_bb_2020
## Min. :24.50 Min. :25.70 Min. :34.80 Min. :33.30
## 1st Qu.:64.20 1st Qu.:67.42 1st Qu.:70.50 1st Qu.:74.10
## Median :70.70 Median :73.60 Median :76.45 Median :79.60
## Mean :69.92 Mean :72.69 Mean :75.40 Mean :78.54
## 3rd Qu.:76.40 3rd Qu.:78.80 3rd Qu.:81.40 3rd Qu.:84.10
## Max. :94.60 Max. :95.50 Max. :96.00 Max. :97.10
## NA's :1
## pct_bb_2021 cfips pct_college_2017 pct_college_2018
## Min. :37.00 Min. : 1001 Min. : 2.40 Min. : 0.00
## 1st Qu.:76.40 1st Qu.:18178 1st Qu.: 9.70 1st Qu.: 9.90
## Median :81.70 Median :29176 Median :12.80 Median :13.00
## Mean :80.54 Mean :30384 Mean :13.81 Mean :14.01
## 3rd Qu.:85.90 3rd Qu.:45080 3rd Qu.:16.80 3rd Qu.:17.10
## Max. :97.60 Max. :56045 Max. :43.70 Max. :48.00
## NA's :1
## pct_college_2019 pct_college_2020 pct_college_2021 pct_foreign_born_2017
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.:10.10 1st Qu.:10.50 1st Qu.:10.60 1st Qu.: 1.400
## Median :13.25 Median :13.60 Median :13.80 Median : 2.700
## Mean :14.24 Mean :14.63 Mean :14.85 Mean : 4.702
## 3rd Qu.:17.30 3rd Qu.:17.90 3rd Qu.:18.00 3rd Qu.: 5.700
## Max. :45.40 Max. :43.00 Max. :43.70 Max. :52.900
## NA's :1 NA's :1
## pct_foreign_born_2018 pct_foreign_born_2019 pct_foreign_born_2020
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.400 1st Qu.: 1.400 1st Qu.: 1.400
## Median : 2.700 Median : 2.700 Median : 2.800
## Mean : 4.725 Mean : 4.769 Mean : 4.749
## 3rd Qu.: 5.700 3rd Qu.: 5.700 3rd Qu.: 5.700
## Max. :53.300 Max. :53.700 Max. :54.000
## NA's :1
## pct_foreign_born_2021 pct_it_workers_2017 pct_it_workers_2018
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.400 1st Qu.: 0.800 1st Qu.: 0.800
## Median : 2.700 Median : 1.300 Median : 1.300
## Mean : 4.744 Mean : 1.427 Mean : 1.382
## 3rd Qu.: 5.700 3rd Qu.: 1.900 3rd Qu.: 1.800
## Max. :54.000 Max. :17.400 Max. :11.700
## NA's :1 NA's :1
## pct_it_workers_2019 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2017
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 19264
## 1st Qu.: 0.700 1st Qu.: 0.700 1st Qu.: 0.600 1st Qu.: 41123
## Median : 1.200 Median : 1.200 Median : 1.100 Median : 48066
## Mean : 1.339 Mean : 1.309 Mean : 1.273 Mean : 49754
## 3rd Qu.: 1.800 3rd Qu.: 1.800 3rd Qu.: 1.700 3rd Qu.: 55764
## Max. :10.500 Max. :15.200 Max. :15.200 Max. :129588
## NA's :1 NA's :1
## median_hh_inc_2018 median_hh_inc_2019 median_hh_inc_2020 median_hh_inc_2021
## Min. : 20188 Min. : 21504 Min. : 22292 Min. : 17109
## 1st Qu.: 42480 1st Qu.: 44155 1st Qu.: 45653 1st Qu.: 48180
## Median : 49888 Median : 51758 Median : 52842 Median : 55907
## Mean : 51583 Mean : 53476 Mean : 55012 Mean : 58223
## 3rd Qu.: 57611 3rd Qu.: 59867 3rd Qu.: 61501 3rd Qu.: 64930
## Max. :136268 Max. :142299 Max. :147111 Max. :156821
## NA's :1 NA's :2 NA's :2
The is.na() function is used to create a logical matrix where TRUE represents a missing value and FALSE represents a non-missing value. The colSums() function is then used to count the number of missing values in each column of the data frame. If the sum of a column is greater than 0, it means that there is at least one missing value in that column.
# Check for missing values in the train data frame
colSums(is.na(train_df))
## row_id cfips county
## 0 0 0
## state first_day_of_month microbusiness_density
## 0 0 0
## active
## 0
#{r fig.width=7, fig.align='center', fig.height=4, out.width='100%'}
# Calculate the number and percentage of missing values for each column
missing_data <- census_df %>%
summarise_all(~ sum(is.na(.))) %>%
gather(variable, missing_count) %>%
mutate(missing_percent = missing_count/nrow(census_df)*100)
# Create two plots side by side
plot1 <- ggplot(missing_data, aes(x = missing_count, y = variable)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(x = "Number of missing values", y = "") +
ggtitle("Number of missing values in census_df")
plot2 <- ggplot(missing_data, aes(x = missing_percent, y = variable)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(x = "Percentage of missing values", y = "") +
ggtitle("Percentage of missing values in census_df")
# Arrange the two plots side by side
grid.arrange(plot1, plot2, ncol = 2)
# Check for missing values in the test data frame
colSums(is.na(test_df))
## row_id cfips first_day_of_month
## 0 0 0
# Check for missing values in the census data frame
colSums(is.na(census_df))
## pct_bb_2017 pct_bb_2018 pct_bb_2019
## 0 0 0
## pct_bb_2020 pct_bb_2021 cfips
## 1 1 0
## pct_college_2017 pct_college_2018 pct_college_2019
## 0 0 0
## pct_college_2020 pct_college_2021 pct_foreign_born_2017
## 1 1 0
## pct_foreign_born_2018 pct_foreign_born_2019 pct_foreign_born_2020
## 0 0 1
## pct_foreign_born_2021 pct_it_workers_2017 pct_it_workers_2018
## 1 0 1
## pct_it_workers_2019 pct_it_workers_2020 pct_it_workers_2021
## 0 1 1
## median_hh_inc_2017 median_hh_inc_2018 median_hh_inc_2019
## 0 1 0
## median_hh_inc_2020 median_hh_inc_2021
## 2 2
We use the complete.cases() function to determine which rows have complete data and which rows have missing values. The complete.cases() function returns a logical vector indicating which rows have no missing values. Therefore, to identify the rows with missing values, we use the ! operator to negate the logical vector returned by complete.cases(). Then, we use the is.na() function to identify which columns have missing values for each missing row:
# Identify rows with missing values in census_df
missing_rows <- which(!complete.cases(census_df))
# Identify columns with missing values for each missing row
for (i in missing_rows) {
cat("Row", i, "has missing values in columns:",
paste(names(census_df)[is.na(census_df[i,])], collapse = ", "), "\n")
}
## Row 93 has missing values in columns: pct_bb_2020, pct_bb_2021, pct_college_2020, pct_college_2021, pct_foreign_born_2020, pct_foreign_born_2021, pct_it_workers_2020, pct_it_workers_2021, median_hh_inc_2020, median_hh_inc_2021
## Row 1817 has missing values in columns: pct_it_workers_2018, median_hh_inc_2018
## Row 2645 has missing values in columns: median_hh_inc_2020
## Row 2674 has missing values in columns: median_hh_inc_2021
print(census_df[missing_rows,])
## pct_bb_2017 pct_bb_2018 pct_bb_2019 pct_bb_2020 pct_bb_2021 cfips
## 93 80.5 79.1 80.4 NA NA 2261
## 1817 49.1 52.1 57.6 60.7 63.5 35039
## 2645 66.3 66.6 61.2 63.2 70.1 48243
## 2674 64.5 72.7 73.3 96.8 97.0 48301
## pct_college_2017 pct_college_2018 pct_college_2019 pct_college_2020
## 93 23.1 19.0 16.5 NA
## 1817 12.0 12.5 12.6 10.6
## 2645 18.4 16.0 10.8 14.3
## 2674 4.7 0.0 0.0 0.0
## pct_college_2021 pct_foreign_born_2017 pct_foreign_born_2018
## 93 NA 4.9 6.3
## 1817 10.1 4.5 3.7
## 2645 10.9 22.4 14.9
## 2674 0.0 10.8 15.7
## pct_foreign_born_2019 pct_foreign_born_2020 pct_foreign_born_2021
## 93 6.6 NA NA
## 1817 4.2 4.5 4.8
## 2645 20.9 10.1 12.7
## 2674 12.2 0.0 1.2
## pct_it_workers_2017 pct_it_workers_2018 pct_it_workers_2019
## 93 3.3 3.9 5.3
## 1817 0.8 NA 0.8
## 2645 0.0 0.0 0.0
## 2674 0.0 0.0 0.0
## pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2017
## 93 NA NA 86019
## 1817 0.4 0.7 33422
## 2645 0.0 0.0 46534
## 2674 0.0 0.0 80938
## median_hh_inc_2018 median_hh_inc_2019 median_hh_inc_2020
## 93 82306 79867 NA
## 1817 NA 39952 42264
## 2645 53194 53088 NA
## 2674 81875 83750 44076
## median_hh_inc_2021
## 93 NA
## 1817 46994
## 2645 38659
## 2674 NA
We use the mice package to impute missing values in the census_df dataframe.
m: the number of imputations to generate was set to 5, because, generally, m should be set to at least 5 for good imputation performance. creating too many datasets will increase the computational cost and may not necessarily lead to better results.
maxit: maxit was set to 50 to allow for a larger number of iterations to ensure that the imputation algorithm converges and fills in missing values as accurately as possible.
method: In this case, we are using “pmm” which stands for “Predictive Mean Matching”, because it is a flexible and widely used imputation method that works well with continuous variables. The method estimates the missing values by drawing from a set of observed values that have similar characteristics to the missing values.
# Impute missing data using mice
imputed_df <- mice(census_df, m = 5, maxit = 50, method = "pmm")
##
## iter imp variable
## 1 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 1 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 1 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 1 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 1 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 2 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 2 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 2 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 2 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 2 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 3 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 3 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 3 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 3 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 3 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 4 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 4 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 4 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 4 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 4 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 5 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 5 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 5 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 5 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 5 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 6 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 6 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 6 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 6 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 6 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 7 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 7 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 7 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 7 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 7 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 8 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 8 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 8 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 8 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 8 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 9 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 9 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 9 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 9 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 9 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 10 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 10 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 10 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 10 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 10 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 11 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 11 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 11 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 11 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 11 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 12 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 12 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 12 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 12 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 12 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 13 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 13 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 13 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 13 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 13 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 14 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 14 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 14 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 14 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 14 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 15 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 15 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 15 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 15 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 15 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 16 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 16 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 16 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 16 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 16 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 17 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 17 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 17 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 17 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 17 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 18 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 18 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 18 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 18 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 18 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 19 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 19 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 19 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 19 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 19 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 20 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 20 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 20 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 20 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 20 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 21 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 21 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 21 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 21 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 21 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 22 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 22 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 22 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 22 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 22 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 23 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 23 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 23 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 23 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 23 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 24 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 24 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 24 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 24 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 24 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 25 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 25 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 25 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 25 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 25 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 26 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 26 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 26 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 26 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 26 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 27 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 27 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 27 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 27 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 27 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 28 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 28 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 28 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 28 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 28 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 29 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 29 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 29 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 29 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 29 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 30 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 30 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 30 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 30 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 30 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 31 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 31 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 31 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 31 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 31 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 32 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 32 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 32 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 32 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 32 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 33 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 33 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 33 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 33 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 33 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 34 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 34 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 34 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 34 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 34 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 35 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 35 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 35 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 35 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 35 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 36 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 36 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 36 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 36 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 36 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 37 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 37 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 37 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 37 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 37 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 38 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 38 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 38 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 38 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 38 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 39 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 39 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 39 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 39 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 39 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 40 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 40 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 40 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 40 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 40 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 41 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 41 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 41 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 41 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 41 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 42 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 42 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 42 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 42 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 42 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 43 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 43 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 43 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 43 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 43 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 44 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 44 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 44 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 44 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 44 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 45 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 45 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 45 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 45 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 45 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 46 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 46 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 46 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 46 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 46 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 47 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 47 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 47 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 47 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 47 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 48 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 48 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 48 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 48 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 48 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 49 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 49 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 49 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 49 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 49 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 50 1 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 50 2 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 50 3 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 50 4 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## 50 5 pct_bb_2020 pct_bb_2021 pct_college_2020 pct_college_2021 pct_foreign_born_2020 pct_foreign_born_2021 pct_it_workers_2018 pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2018 median_hh_inc_2020 median_hh_inc_2021
## Warning: Number of logged events: 500
# Extract imputed data
imputed_data <- complete(imputed_df)
# Check for missing values in imputed data
colSums(is.na(imputed_data))
## pct_bb_2017 pct_bb_2018 pct_bb_2019
## 0 0 0
## pct_bb_2020 pct_bb_2021 cfips
## 0 0 0
## pct_college_2017 pct_college_2018 pct_college_2019
## 0 0 0
## pct_college_2020 pct_college_2021 pct_foreign_born_2017
## 0 0 0
## pct_foreign_born_2018 pct_foreign_born_2019 pct_foreign_born_2020
## 0 0 0
## pct_foreign_born_2021 pct_it_workers_2017 pct_it_workers_2018
## 0 0 0
## pct_it_workers_2019 pct_it_workers_2020 pct_it_workers_2021
## 0 0 0
## median_hh_inc_2017 median_hh_inc_2018 median_hh_inc_2019
## 0 0 0
## median_hh_inc_2020 median_hh_inc_2021
## 0 0
print(imputed_data[missing_rows,])
## pct_bb_2017 pct_bb_2018 pct_bb_2019 pct_bb_2020 pct_bb_2021 cfips
## 93 80.5 79.1 80.4 80.9 81.3 2261
## 1817 49.1 52.1 57.6 60.7 63.5 35039
## 2645 66.3 66.6 61.2 63.2 70.1 48243
## 2674 64.5 72.7 73.3 96.8 97.0 48301
## pct_college_2017 pct_college_2018 pct_college_2019 pct_college_2020
## 93 23.1 19.0 16.5 15.9
## 1817 12.0 12.5 12.6 10.6
## 2645 18.4 16.0 10.8 14.3
## 2674 4.7 0.0 0.0 0.0
## pct_college_2021 pct_foreign_born_2017 pct_foreign_born_2018
## 93 15.6 4.9 6.3
## 1817 10.1 4.5 3.7
## 2645 10.9 22.4 14.9
## 2674 0.0 10.8 15.7
## pct_foreign_born_2019 pct_foreign_born_2020 pct_foreign_born_2021
## 93 6.6 3.9 6.3
## 1817 4.2 4.5 4.8
## 2645 20.9 10.1 12.7
## 2674 12.2 0.0 1.2
## pct_it_workers_2017 pct_it_workers_2018 pct_it_workers_2019
## 93 3.3 3.9 5.3
## 1817 0.8 0.5 0.8
## 2645 0.0 0.0 0.0
## 2674 0.0 0.0 0.0
## pct_it_workers_2020 pct_it_workers_2021 median_hh_inc_2017
## 93 4.9 5.1 86019
## 1817 0.4 0.7 33422
## 2645 0.0 0.0 46534
## 2674 0.0 0.0 80938
## median_hh_inc_2018 median_hh_inc_2019 median_hh_inc_2020
## 93 82306 79867 71505
## 1817 33456 39952 42264
## 2645 53194 53088 42126
## 2674 81875 83750 44076
## median_hh_inc_2021
## 93 73892
## 1817 46994
## 2645 38659
## 2674 52956
index <- unique(train_df$first_day_of_month)
print(index)
## [1] "2019-08-01" "2019-09-01" "2019-10-01" "2019-11-01" "2019-12-01"
## [6] "2020-01-01" "2020-02-01" "2020-03-01" "2020-04-01" "2020-05-01"
## [11] "2020-06-01" "2020-07-01" "2020-08-01" "2020-09-01" "2020-10-01"
## [16] "2020-11-01" "2020-12-01" "2021-01-01" "2021-02-01" "2021-03-01"
## [21] "2021-04-01" "2021-05-01" "2021-06-01" "2021-07-01" "2021-08-01"
## [26] "2021-09-01" "2021-10-01" "2021-11-01" "2021-12-01" "2022-01-01"
## [31] "2022-02-01" "2022-03-01" "2022-04-01" "2022-05-01" "2022-06-01"
## [36] "2022-07-01" "2022-08-01" "2022-09-01" "2022-10-01"
training data is from 08/2019 to 10/2022
index <- unique(test_df$first_day_of_month)
print(index)
## [1] "2022-11-01" "2022-12-01" "2023-01-01" "2023-02-01" "2023-03-01"
## [6] "2023-04-01" "2023-05-01" "2023-06-01"
prediction dates are from 11/2022 to 06/2023
To make analysis easier and be able to group the data by year and month, we’ll extract year and month values from the first_day_of_month column in both train and test dataframes using apply() method and lambda function, and then create new columns called year (int), month (int), and year_month (str) in each dataframe to store these values.
# Add year, month and year_month columns to train_df
train_df$year <- as.integer(substr(train_df$first_day_of_month, 1, 4))
train_df$month <- as.integer(substr(train_df$first_day_of_month, 6, 7))
train_df$year_month <- substr(train_df$first_day_of_month, 1, 7)
# Add year, month and year_month columns to test_df
test_df$year <- as.integer(substr(test_df$first_day_of_month, 1, 4))
test_df$month <- as.integer(substr(test_df$first_day_of_month, 6, 7))
test_df$year_month <- substr(test_df$first_day_of_month, 1, 7)
# Set variables of interest
vars <- c("pct_bb", "pct_college", "pct_foreign_born", "pct_it_workers", "median_hh_inc")
# Loop through variables and merge with train_df
merged_df <- train_df
for (var in vars) {
# Select columns and pivot longer
merged_df <- imputed_data %>%
select(cfips, paste0(var, "_2017"):paste0(var, "_2020")) %>%
pivot_longer(cols = starts_with(var),
names_to = "year",
values_to = var) %>%
# Modify year and month columns
mutate(year = as.integer(str_sub(year, -4)) + 2) %>%
uncount(12, .id = "month") %>%
mutate(month = month) %>%
# Merge with merged_df
merge(merged_df, by = c("cfips", "year", "month"), all.x = TRUE) %>%
arrange(cfips, row_id)
}
merged_df <- merged_df %>%
select(row_id, cfips, county, state, first_day_of_month, microbusiness_density, active, year_month, year, month, pct_bb, pct_college, pct_foreign_born, pct_it_workers, median_hh_inc) %>%
# remove NA values created because of the
na.omit(merged_df)
colSums(is.na(merged_df))
## row_id cfips county
## 0 0 0
## state first_day_of_month microbusiness_density
## 0 0 0
## active year_month year
## 0 0 0
## month pct_bb pct_college
## 0 0 0
## pct_foreign_born pct_it_workers median_hh_inc
## 0 0 0
str(train_df)
## 'data.frame': 122265 obs. of 10 variables:
## $ row_id : chr "1001_2019-08-01" "1001_2019-09-01" "1001_2019-10-01" "1001_2019-11-01" ...
## $ cfips : int 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
## $ county : chr "Autauga County" "Autauga County" "Autauga County" "Autauga County" ...
## $ state : chr "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ first_day_of_month : chr "2019-08-01" "2019-09-01" "2019-10-01" "2019-11-01" ...
## $ microbusiness_density: num 3.01 2.88 3.06 2.99 2.99 ...
## $ active : int 1249 1198 1269 1243 1243 1242 1217 1227 1255 1257 ...
## $ year : int 2019 2019 2019 2019 2019 2020 2020 2020 2020 2020 ...
## $ month : int 8 9 10 11 12 1 2 3 4 5 ...
## $ year_month : chr "2019-08" "2019-09" "2019-10" "2019-11" ...
cat(rep("=", 40), "\n")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
str(test_df)
## 'data.frame': 25080 obs. of 6 variables:
## $ row_id : chr "1001_2022-11-01" "1003_2022-11-01" "1005_2022-11-01" "1007_2022-11-01" ...
## $ cfips : int 1001 1003 1005 1007 1009 1011 1013 1015 1017 1019 ...
## $ first_day_of_month: chr "2022-11-01" "2022-11-01" "2022-11-01" "2022-11-01" ...
## $ year : int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ month : int 11 11 11 11 11 11 11 11 11 11 ...
## $ year_month : chr "2022-11" "2022-11" "2022-11" "2022-11" ...
# Create plots
p1 <- train_df %>%
# Group train_df by year_month
group_by(year_month) %>%
# calculate the mean value of microbusiness_density for each group
summarise(mean_microbusiness_density = mean(microbusiness_density)) %>%
ggplot(aes(x = year_month, y = mean_microbusiness_density, group = 1)) +
geom_line() +
labs(title = "Overall Average Microbusiness Density",
x = "Year-Month",
y = "Average Microbusiness Density") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
p2 <- train_df %>%
group_by(year_month) %>%
summarize(avg_active = mean(active)) %>%
ggplot(aes(x = year_month, y = avg_active)) +
geom_line(group = 1) +
labs(title = "Overall Active Microbusiness Count",
x = "Year-Month",
y = "Active") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Display the plots
grid.arrange(p1, p2, nrow = 2)
# Group train_df by year and calculate the mean value of microbusiness_density for each group
train_df_mean_year <- train_df %>%
group_by(year) %>%
summarize(avg_microbusiness_density = mean(microbusiness_density))
# Group train_df by month and calculate the mean value of the target variable for each group
train_df_mean_month <- train_df %>%
group_by(month) %>%
summarize(avg_microbusiness_density = mean(microbusiness_density))
# Plot the monthly mean values
p1 <-
ggplot(train_df_mean_month, aes(x = month, y = avg_microbusiness_density)) +
geom_line() +
ggtitle("Avg Monthly Microbusiness Density") +
xlab("Month") +
ylab("Average Microbusiness Density") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Plot the yearly mean values
p2 <-
ggplot(train_df_mean_year, aes(x = year, y = avg_microbusiness_density)) +
geom_line() +
ggtitle("Avg Yearly Microbusiness Density") +
xlab("Year") +
ylab("Average Microbusiness Density") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Display the plots side by side
grid.arrange(p1, p2, ncol=2)
The Bureau of Economic Analysis (BEA) divides the United States into eight distinct economic regions. These regions are based on similarities in economic characteristics such as industry composition, income levels, and employment patterns. The eight regions are:
New England: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, and Vermont.
The economy in this region is largely based on manufacturing, healthcare, education, and finance.
Mideast: Delaware, Maryland, New Jersey, New York, Pennsylvania, and the District of Columbia.
The region has a diverse economy, with a mix of manufacturing, finance, healthcare, and professional services.
Great Lakes: Illinois, Indiana, Michigan, Ohio, and Wisconsin.
The region has a strong manufacturing base, particularly in the automotive industry, and also has a significant healthcare sector.
Plains: Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, and South Dakota.
Agriculture and energy production are major industries in this region, along with manufacturing and healthcare.
Southeast: Alabama, Arkansas, Florida, Georgia, Kentucky, Louisiana, Mississippi, North Carolina, South Carolina, Tennessee, Virginia, and West Virginia.
The Southeast has a diverse economy, with significant industries in healthcare, finance, and manufacturing, as well as tourism and agriculture.
Southwest: Arizona, New Mexico, Oklahoma, and Texas.
The region has a strong energy sector, particularly in oil and gas production, and also has significant industries in manufacturing, healthcare, and finance.
Rocky Mountain: Colorado, Idaho, Montana, Utah, and Wyoming.
The region is known for its natural resources, particularly in mining and energy production, as well as tourism, healthcare, and manufacturing.
Far West: Alaska, California, Hawaii, Nevada, Oregon, and Washington.
This region has a diverse economy, with significant industries in technology, finance, healthcare, and manufacturing, as well as tourism and agriculture.
# Get the map of the United States
us_map <- map_data("state")
# Create a lookup table for state abbreviations and their corresponding full names
state_names <- data.frame(state = state.abb, name = tolower(state.name))
# Map the regions to the states
region_map <- us_map %>%
#left_join(state_names, by = c("region" = "state")) %>%
left_join(state_names, by = c("region" = "name")) %>%
# merge(us_map, state_names, by.x=c("region"), by.y=c("name")) %>%
mutate(region =
ifelse(region %in% c("connecticut", "maine", "massachusetts", "new hampshire", "rhode island", "vermont"), "New England",
ifelse(region %in% c("delaware", "maryland", "new jersey", "new york", "pennsylvania", "district of columbia"), "Mideast",
ifelse(region %in% c("illinois", "indiana", "michigan", "ohio", "wisconsin"), "Great Lakes",
ifelse(region %in% c("iowa", "kansas", "minnesota", "missouri", "nebraska", "north dakota", "south dakota"), "Plains",
ifelse(region %in% c("alabama", "arkansas", "florida", "georgia", "kentucky", "louisiana", "mississippi", "north carolina", "south carolina", "tennessee", "virginia", "west virginia"), "Southeast",
ifelse(region %in% c("arizona", "new mexico", "oklahoma", "texas"), "Southwest",
ifelse(region %in% c("colorado", "idaho", "montana", "utah", "wyoming"), "Rocky Mountain",
ifelse(region %in% c("alaska", "california", "hawaii", "nevada", "oregon", "washington"), "Far West", NA
)
)
)
)
)
)
)
)
)
# Summarize the data to get the center coordinates of each state
#state_centers <- region_map %>%
# group_by(state) %>%
# summarise(long = mean(long), lat = mean(lat))
# add labels
states <- aggregate(cbind(long, lat) ~ region, data=us_map,
FUN=function(x)mean(range(x)))
states$group <- c("AL", "AR", "AZ", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "IA",
"ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN",
"MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV", "NY",
"OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VA",
"VT", "WA", "WI", "WV", "WY")
# names(states)[names(states) == "region"] <- "group"
#Plot the map
ggplot(region_map, aes(x = long, y = lat, group = group, fill = region)) +
geom_polygon(color = "black", show.legend = TRUE) +
# geom_text(aes(label = state), data = region_map, size = 3, vjust = 2, hjust = 2) +
# geom_text(aes(label = state), data = state_centers, size = 2, vjust = 2, hjust = 2) +
geom_text(data = states, aes(long, lat, label = group), size = 2.5, inherit.aes = FALSE, color = "white", fontface = "bold") +
# scale_fill_gradient(low = "white", high = "darkred") +
# scale_fill_manual(values = viridis(n = 60), na.value = "gray") +
labs(title = "Bureau of Economic Analysis Regional Divisions Map", fill = "Region") +
# geom_text(aes(x = long, y = lat, label = state), data = state_centers, size = 3, color = "white") +
theme_void() +
theme(panel.background = element_rect(fill = "gray75", color = NA))
str(states)
## 'data.frame': 49 obs. of 4 variables:
## $ region: chr "alabama" "arizona" "arkansas" "california" ...
## $ long : num -86.7 -111.9 -92.1 -119.3 -105.6 ...
## $ lat : num 32.6 34.2 34.8 37.3 39 ...
## $ group : chr "AL" "AR" "AZ" "CA" ...
First, we need to convert state and county columns in
train_df to lowercase letters. Because, the data from
map_data() will be in lowercase and when merging two dataframes it might
cause problems.
# Convert state and county columns in train_df to lowercase
train_df <- train_df %>%
mutate(state = tolower(state)) %>%
mutate(county = tolower(county))
Then, we’ll create a new column named region and assign region values based on state column:
# Create a new column named region and initialize all values as NA
train_df$region <- NA
# Assign region values based on state column
for (i in 1:nrow(train_df)) {
if (train_df$state[i] %in% c("connecticut", "maine", "massachusetts", "new hampshire", "rhode island", "vermont")) {
train_df$region[i] <- "new england"
} else if (train_df$state[i] %in% c("delaware", "maryland", "new jersey", "new york", "pennsylvania", "district of columbia")) {
train_df$region[i] <- "mideast"
} else if (train_df$state[i] %in% c("illinois", "indiana", "michigan", "ohio", "wisconsin")) {
train_df$region[i] <- "great lakes"
} else if (train_df$state[i] %in% c("iowa", "kansas", "minnesota", "missouri", "nebraska", "north dakota", "south dakota")) {
train_df$region[i] <- "plains"
} else if (train_df$state[i] %in% c("alabama", "arkansas", "florida", "georgia", "kentucky", "louisiana", "mississippi", "north carolina", "south carolina", "tennessee", "virginia", "west virginia")) {
train_df$region[i] <- "southeast"
} else if (train_df$state[i] %in% c("arizona", "new mexico", "oklahoma", "texas")) {
train_df$region[i] <- "southwest"
} else if (train_df$state[i] %in% c("colorado", "idaho", "montana", "utah", "wyoming")) {
train_df$region[i] <- "rocky mountain"
} else if (train_df$state[i] %in% c("alaska", "california", "hawaii", "nevada", "oregon", "washington")) {
train_df$region[i] <- "far west"
} else {
train_df$region[i] <- "other"
}
}
# Print all the unique values in the region column
unique(train_df$region)
## [1] "southeast" "far west" "southwest" "rocky mountain"
## [5] "new england" "mideast" "great lakes" "plains"
# Group train_df by region and calculate average microbusiness density
train_df %>%
group_by(region) %>%
summarize(avg_density = mean(microbusiness_density)) %>%
# Create bar plot of average density by region
ggplot(aes(x = region, y = avg_density)) +
geom_bar(stat = "identity") +
# Add plot title and axis labels
labs(title = "Average Microbusiness Density Per Region",
x = "Region", y = "Avg Density") +
# Apply a black and white theme to the plot
theme_bw()
# Group train_df by region and calculate average microbusiness density
avg_density <- train_df %>%
group_by(region) %>%
summarize(avg_density = mean(microbusiness_density))
# Create a lookup table for state abbreviations and their corresponding full names
state_names <- data.frame(state = state.abb, name = tolower(state.name))
# Lowercase region column of region_map
region_map <- region_map %>%
mutate(region = tolower(region))
# Merge the average density data with the region_map data
plot_data <- merge(region_map, avg_density, by = "region") %>%
arrange(order)
# Coordinates of the center of regions
bea_regions <- data.frame(
group = c("New England", "Mideast", "Great Lakes", "Plains",
"Southeast", "Southwest", "Rocky Mountain", "Far West"),
x = c(-71.8, -76.9, -86.6, -98.5, -82.4, -106.4, -111.1, -119.8),
y = c(42.2, 39, 43.4, 39.8, 32.6, 34.3, 44.4, 38.4)
)
# Create the plot
ggplot(plot_data, aes(x = long, y = lat, group = group, fill = avg_density)) +
geom_polygon(color = "black") +
geom_label(data = bea_regions,
aes(x = x, y = y, label = group),
size = 3, fontface = "bold",
label.padding = unit(0.2, "lines"),
label.size = 0.2,
fill = "gray75", color = "black") +
scale_fill_gradient(low = "white", high = "darkred") +
# scale_fill_viridis(name = "Avg Density", na.value = "gray") +
labs(title = "Average Microbusiness Density Per Region", fill = "Avg Density") +
theme_void()
# Aggregate microbusiness density by state
state_avg <- aggregate(microbusiness_density ~ state, data = train_df, FUN = mean)
# Load US map data
us_map <- map_data("state")
# Merge state_avg with us_map based on region and state
map_data <- merge(us_map, state_avg, by.x = "region", by.y = "state")
# Create a heatmap of microbusiness density by state
ggplot(map_data, aes(x = long, y = lat, group = group, fill = microbusiness_density)) +
geom_polygon() +
scale_fill_gradient(low = "white", high = "navyblue") +
coord_map() +
labs(title = "Average Microbusiness Density per State", fill = "Density") +
theme_void() +
theme(panel.background = element_rect(fill = "lightblue", color = NA))
#{r fig.width = 10 ,fig.height = 12, out.width='100%', fig.align='center'}
# Aggregate microbusiness density by county
#county_avg <- train_df %>%
# group_by(cfips, county) %>%
# summarise(microbusiness_density = mean(microbusiness_density))
county_avg <- aggregate(microbusiness_density ~ county + state, data = train_df, FUN = mean)
county_avg$county <- gsub(" county", "", county_avg$county)
county_avg$county <- gsub(" city", "", county_avg$county)
county_avg$county <- gsub(" parish", "", county_avg$county)
# Load US county map data
us_map <- map_data("county")
# Merge county_avg with us_map based on region and county
map_data <- merge(us_map, county_avg, by.x = c("subregion", "region"), by.y = c("county", "state")) %>%
arrange(order)
# Create a heatmap of microbusiness density by county using ggplot2
ggplot(map_data, aes(x = long, y = lat, group = group, fill = microbusiness_density)) +
geom_polygon() +
scale_fill_gradient(low = "lightblue", high = "navyblue") +
coord_map() +
labs(title = "Average Microbusiness Density per County", fill = "Density") +
theme_void() +
theme(panel.background = element_rect(fill = "gray85", color = NA))
str(merged_df)
## 'data.frame': 122265 obs. of 15 variables:
## $ row_id : chr "1001_2019-08-01" "1001_2019-09-01" "1001_2019-10-01" "1001_2019-11-01" ...
## $ cfips : int 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
## $ county : chr "Autauga County" "Autauga County" "Autauga County" "Autauga County" ...
## $ state : chr "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ first_day_of_month : chr "2019-08-01" "2019-09-01" "2019-10-01" "2019-11-01" ...
## $ microbusiness_density: num 3.01 2.88 3.06 2.99 2.99 ...
## $ active : int 1249 1198 1269 1243 1243 1242 1217 1227 1255 1257 ...
## $ year_month : chr "2019-08" "2019-09" "2019-10" "2019-11" ...
## $ year : num 2019 2019 2019 2019 2019 ...
## $ month : int 8 9 10 11 12 1 2 3 4 5 ...
## $ pct_bb : num 76.6 76.6 76.6 76.6 76.6 78.9 78.9 78.9 78.9 78.9 ...
## $ pct_college : num 14.5 14.5 14.5 14.5 14.5 15.9 15.9 15.9 15.9 15.9 ...
## $ pct_foreign_born : num 2.1 2.1 2.1 2.1 2.1 2 2 2 2 2 ...
## $ pct_it_workers : num 1.3 1.3 1.3 1.3 1.3 1.1 1.1 1.1 1.1 1.1 ...
## $ median_hh_inc : num 55317 55317 55317 55317 55317 ...
## - attr(*, "na.action")= 'omit' Named int [1:28551] 40 41 42 43 44 45 46 47 48 88 ...
## ..- attr(*, "names")= chr [1:28551] "40" "41" "42" "43" ...
Boxplots are a visualization tool that provide insights into the central tendency and spread of a dataset, as well as identify outliers and skewness. They are useful for detecting anomalies and comparing variable distributions in a dataset, providing valuable insights into data distribution for exploratory data analysis.
boxplot(merged_df$microbusiness_density, col = "blue", main = "Microbusiness Density")
par(mfrow=c(3,2)) # set plot layout to 3 rows and 2 columns
boxplot(merged_df$median_hh_inc, col = "pink", main = "Median Household Income")
boxplot(merged_df$pct_college, col = "pink", main = "Percentage with College Education")
boxplot(merged_df$pct_foreign_born, col = "pink", main = "Percentage of Foreign-born Residents")
boxplot(merged_df$pct_it_workers, col = "pink", main = "Percentage of IT Workers")
boxplot(merged_df$pct_bb, color = "pink", main = "Percentage of Broadband Access")
boxplot(merged_df$active, color = "pink", main = "Active Microbusiness Count")
Outlier detection is an important step in data analysis, as outliers can significantly affect the results of statistical analyses. One method for outlier detection is the decision range approach.
The decision range approach involves setting a range of values outside of which any observations are considered outliers. The decision range is determined based on the data distribution and the researcher’s judgment. One common approach is to use the interquartile range (IQR) to define the decision range. The IQR is calculated as the difference between the third quartile (Q3) and the first quartile (Q1) of the data.
The decision range is then defined as the range from Q1 - 1.5IQR to Q3 + 1.5IQR. Any observations that fall outside of this range are considered outliers. This method is useful for identifying potential outliers in a dataset and can help to ensure that statistical analyses are robust and accurate.
quartiles <- quantile(merged_df$microbusiness_density, probs = seq(0, 1, 0.25), na.rm = FALSE,
names = TRUE, type = 7, digits = 6)
quartiles
## 0% 25% 50% 75% 100%
## 0.000000 1.639344 2.586543 4.519231 284.340030
# Calculate the mean and standard deviation of 'microbusiness_density'
mean_density <- mean(merged_df$microbusiness_density)
sd_density <- sd(merged_df$microbusiness_density)
# Create a range of values for the x-axis
x_values <- seq(mean_density - 3*sd_density, mean_density + 3*sd_density, length.out = 1000)
# Create a bell curve using the 'dnorm' function with mean and standard deviation calculated above
y_values <- dnorm(x_values, mean = mean_density, sd = sd_density)
# Combine the 'x_values' and 'y_values' into a data frame using the 'data.frame' function
density_df <- data.frame(x = x_values, y = y_values)
# Create a boxplot using the 'ggplot' function from 'ggplot2' package
boxplot <- ggplot(data = merged_df, aes(x = "", y = merged_df$microbusiness_density)) +
geom_boxplot(fill = "skyblue") +
labs(x = "", y = "Microbusiness Density") +
ggtitle("Boxplot for Microbusiness Density")
# Create a Q-Q plot using the 'ggplot' function from 'ggplot2' package and add a diagonal line using the 'geom_abline' function
qqplot <- ggplot(data = density_df, aes(sample = x)) +
geom_qq() +
geom_abline(intercept = 0, slope = 1, colour = "red") +
labs(x = "Theoretical Normal Quantiles", y = "Observed Quantiles") +
ggtitle("Q-Q Plot for Microbusiness Density")
# Create a bell curve using the 'ggplot' function from 'ggplot2' package and add the bell curve using the 'geom_line' function
densityplot <- ggplot(data = merged_df, aes(x = microbusiness_density)) +
geom_histogram(aes(y = ..density..), bins = 30, colour = "black", fill = "white") +
geom_line(data = density_df, aes(x = x, y = y), colour = "red", size = 1) +
labs(x = "Microbusiness Density", y = "Density") +
ggtitle("Distribution of Microbusiness Density")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
# Arrange the plots in one row using the 'grid.arrange' function from the 'gridExtra' package
grid.arrange(densityplot, boxplot, qqplot, ncol = 3, widths = c(2, 1, 2))
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
par(mfrow=c(1,3)) # set plot layout to 3 rows and 2 columns
mean_density <- mean(merged_df$microbusiness_density)
sd_density <- sd(merged_df$microbusiness_density)
x_values <- seq(mean_density - 3*sd_density, mean_density + 3*sd_density, length.out = 1000)
y_values <- dnorm(x_values, mean = mean_density, sd = sd_density)
density_df <- data.frame(x = x_values, y = y_values)
ggplot(merged_df, aes(x = microbusiness_density)) +
geom_histogram(aes(y = ..density..), bins = 30, colour = "black", fill = "lightblue") +
geom_line(data = density_df, aes(x = x, y = y), colour = "navy", linewidth = 1) +
labs(x = "Microbusiness Density", y = "Density") +
ggtitle("Distribution of Microbusiness Density") +
theme_minimal()
theoretical_norm <- qnorm(p = seq(0, 1, length.out = length(merged_df$microbusiness_density)), mean = mean(merged_df$microbusiness_density), sd = sd(merged_df$microbusiness_density))
density_df <- data.frame(observed = sort(merged_df$microbusiness_density), theoretical = theoretical_norm)
ggplot(density_df, aes(x = theoretical, y = observed)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, colour = "red") +
labs(x = "Theoretical Normal Quantiles", y = "Observed Quantiles") +
ggtitle("Q-Q Plot for Microbusiness Density")
ggplot(data = merged_df, aes(x = "", y = microbusiness_density)) +
geom_boxplot(fill = "skyblue") +
labs(x = "", y = "Microbusiness Density") +
ggtitle("Boxplot for Microbusiness Density")
# compute the correlation matrix
corr_matrix <- cor(imputed_data)
# visualize the correlation matrix using corrplot
corrplot(corr_matrix, type = "upper", method = "circle")
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
# Calculate the correlation matrix
cor_mat <- cor(imputed_data)
# Melt the correlation matrix to a long format
melted_cor <- melt(cor_mat)
# Generate the heatmap
ggplot(melted_cor, aes(x=Var1, y=Var2, fill=value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 8, hjust = 1),
axis.text.y = element_text(size = 8),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.8, 0.2),
legend.direction = "horizontal")
str(imputed_data)
## 'data.frame': 3142 obs. of 26 variables:
## $ pct_bb_2017 : num 76.6 74.5 57.2 62 65.8 49.4 58.2 71 62.8 67.5 ...
## $ pct_bb_2018 : num 78.9 78.1 60.4 66.1 68.5 58.9 62.1 73 66.5 68.6 ...
## $ pct_bb_2019 : num 80.6 81.8 60.5 69.2 73 60.1 64.6 75.1 69.4 70.7 ...
## $ pct_bb_2020 : num 82.7 85.1 64.6 76.1 79.6 60.6 73.6 79.8 74.5 75 ...
## $ pct_bb_2021 : num 85.5 87.9 64.6 74.6 81 59.4 76.3 81.6 77.1 76.7 ...
## $ cfips : int 1001 1003 1005 1007 1009 1011 1013 1015 1017 1019 ...
## $ pct_college_2017 : num 14.5 20.4 7.6 8.1 8.7 6.6 9.6 10.2 9 6.6 ...
## $ pct_college_2018 : num 15.9 20.7 7.8 7.6 8.1 7.4 9.7 10.2 9.3 6.8 ...
## $ pct_college_2019 : num 16.1 21 7.6 6.5 8.6 7.4 9.7 10.5 9.5 6.6 ...
## $ pct_college_2020 : num 16.7 20.2 7.3 7.4 8.9 6.1 10.1 10.5 10.5 6.3 ...
## $ pct_college_2021 : num 16.4 20.6 6.7 7.9 9.3 8.1 8.1 11.4 9.6 6.2 ...
## $ pct_foreign_born_2017: num 2.1 3.2 2.7 1 4.5 1.8 1 2.6 1.3 0.7 ...
## $ pct_foreign_born_2018: num 2 3.4 2.5 1.4 4.4 0.9 1.4 2.7 1.4 0.8 ...
## $ pct_foreign_born_2019: num 2.3 3.7 2.7 1.5 4.5 0.7 0.8 2.7 1.8 0.9 ...
## $ pct_foreign_born_2020: num 2.3 3.4 2.6 1.6 4.4 1.5 1.9 2.5 1.9 1.9 ...
## $ pct_foreign_born_2021: num 2.1 3.5 2.6 1.1 4.5 1.2 1.7 2.5 2 2 ...
## $ pct_it_workers_2017 : num 1.3 1.4 0.5 1.2 1.3 0.4 1.1 1.4 2.4 1.4 ...
## $ pct_it_workers_2018 : num 1.1 1.3 0.3 1.4 1.4 0.3 1.4 1.4 2.1 1.3 ...
## $ pct_it_workers_2019 : num 0.7 1.4 0.8 1.6 0.9 0.5 1.7 1.2 2.1 1.2 ...
## $ pct_it_workers_2020 : num 0.6 1 1.1 1.7 1.1 0.3 1.3 1 2.3 0.9 ...
## $ pct_it_workers_2021 : num 1.1 1.3 0.8 2.1 0.9 0.2 1.4 1 1.8 0.4 ...
## $ median_hh_inc_2017 : int 55317 52562 33368 43404 47412 29655 36326 43686 37342 40041 ...
## $ median_hh_inc_2018 : num 58786 55962 34186 45340 48695 ...
## $ median_hh_inc_2019 : int 58731 58320 32525 47542 49358 37785 40688 47255 42289 41919 ...
## $ median_hh_inc_2020 : num 57982 61756 34990 51721 48922 ...
## $ median_hh_inc_2021 : num 62660 64346 36422 54277 52830 ...
# Bar Plot
ggplot(imputed_data, aes(x=cfips, y=median_hh_inc_2017)) +
geom_bar(stat="identity", fill="steelblue") +
labs(title = "Median Household Income in 2017",
x = "County FIPS Code", y = "Median Household Income")
# Pie Chart
ggplot(imputed_data, aes(x="", y=pct_foreign_born_2017, fill=cfips)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0) +
labs(title = "Percentage of Foreign-born Population in 2017",
fill = "County FIPS Code")
#creating a boxplot of the 'first_interval' column of the CustomerChurn_temp dataframe, grouped by the 'first_interval_label' column.
ggplot(merged_df, aes("", microbusiness_density)) +
geom_boxplot()+scale_fill_manual(values = c("0" = "#009999","1" = "#CC3300"))
#Outlier Detection using decision range:
#Any data point lies outside of this range is considered as outliers
#We have lower bound and upper bound. any data outside of these 2 bounds will be considered as outliers.
#Here we are just going to consider to consider the upper bound, as we can't able to see any outliers in lower bound region.
#Upper Bound: (Q3 + 1.5 * IQR)
#Finding the Quartiles(2nd Quartile and 3rd quartile)
quartiles <- quantile(train_df$microbusiness_density, probs=c(.25, .75), na.rm = FALSE)
#quartiles
#getting the IQR
iqr<-IQR(train_df$microbusiness_density)
#upper bound
second_interval_ub <- quartiles[2] + 1.5*iqr
second_interval_ub
## 75%
## 8.839061
#now we can label the second_interval based on upper bound
train_df$microbusiness_density_label<-as.numeric(train_df$microbusiness_density > second_interval_ub)
# Create the plot
ggplot(merged_df, aes(x = "", y = microbusiness_density)) +
geom_boxplot(fill = "lightblue") +
#labs(title = "doesn;t matter",
# x = NULL, y = NULL) +
theme_minimal()
library(ggplot2)
# Create the plot
ggplot(imputed_data, aes(x = "", y = pct_bb_2021)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Percentage of Population with Broadband Access in 2021",
x = NULL, y = "Percentage") +
theme_minimal()
# Create the box plot
ggplot(imputed_data, aes(x = "", y = pct_bb_2017)) +
geom_boxplot(fill = "lightblue", color = "blue") +
labs(title = "Percentage of Broadband Access in 2017", y = "Percentage") +
theme_bw()
# Scatter Plot
ggplot(imputed_data, aes(x=median_hh_inc_2017, y=pct_it_workers_2017)) +
geom_point() +
labs(title = "Percentage of IT Workers and Median Household Income in 2017",
x = "Median Household Income", y = "Percentage of IT Workers")
# Heatmap
imputed_data_long <- melt(imputed_data, id.vars="cfips",
measure.vars=c("pct_bb_2017", "pct_bb_2018", "pct_bb_2019",
"pct_bb_2020", "pct_bb_2021"),
variable.name="year", value.name="pct_bb")
ggplot(imputed_data_long, aes(x=cfips, y=year, fill=pct_bb)) +
geom_tile() +
scale_fill_gradient(low="white", high="steelblue") +
labs(title = "Percentage of Broadband Coverage",
x = "County FIPS Code", y = "Year")
us.map <- map_data('state')
# add PADD zones
us.map$PADD[us.map$region %in%
c("maine", "vermont", "new hampshire", "massachusetts", "connecticut", "rhode island",
"new york", "pennsylvania", "new jersey", "delaware", "district of columbia", "maryland",
"west virginia", "virginia", "north carolina", "south carolina", "georgia", "florida")] <- "PADD 1: East Coast"
us.map$PADD[us.map$region %in%
c("south dakota", "north dakota","nebraska", "kansas", "oklahoma",
"minnesota", "iowa", "missouri", "wisconsin", "illinois", "indiana",
"michigan", "ohio", "kentucky", "tennessee")] <- "PADD 2: Midwest"
us.map$PADD[us.map$region %in%
c("new mexico", "texas", "arkansas", "louisiana", "alabama", "mississippi")] <- "PADD 3: Gulf Coast"
us.map$PADD[us.map$region %in%
c("montana", "idaho", "wyoming", "utah", "colorado")] <- "PADD 4: Rocky Mountain"
us.map$PADD[us.map$region %in%
c("washington", "oregon", "nevada", "arizona", "california")] <- "PADD 5: West Coast"
# subset the dataframe by padd zones and move lat/lon accordingly
us.map$lat.transp[us.map$PADD == "PADD 1: East Coast"] <- us.map$lat[us.map$PADD == "PADD 1: East Coast"]
us.map$long.transp[us.map$PADD == "PADD 1: East Coast"] <- us.map$long[us.map$PADD == "PADD 1: East Coast"] + 5
us.map$lat.transp[us.map$PADD == "PADD 2: Midwest"] <- us.map$lat[us.map$PADD == "PADD 2: Midwest"]
us.map$long.transp[us.map$PADD == "PADD 2: Midwest"] <- us.map$long[us.map$PADD == "PADD 2: Midwest"]
us.map$lat.transp[us.map$PADD == "PADD 3: Gulf Coast"] <- us.map$lat[us.map$PADD == "PADD 3: Gulf Coast"] - 3
us.map$long.transp[us.map$PADD == "PADD 3: Gulf Coast"] <- us.map$long[us.map$PADD == "PADD 3: Gulf Coast"]
us.map$lat.transp[us.map$PADD == "PADD 4: Rocky Mountain"] <- us.map$lat[us.map$PADD == "PADD 4: Rocky Mountain"]
us.map$long.transp[us.map$PADD == "PADD 4: Rocky Mountain"] <- us.map$long[us.map$PADD == "PADD 4: Rocky Mountain"] - 5
us.map$lat.transp[us.map$PADD == "PADD 5: West Coast"] <- us.map$lat[us.map$PADD == "PADD 5: West Coast"] - 2
us.map$long.transp[us.map$PADD == "PADD 5: West Coast"] <- us.map$long[us.map$PADD == "PADD 5: West Coast"] - 10
# add labels
states <- aggregate(cbind(long.transp, lat.transp) ~ region, data=us.map,
FUN=function(x)mean(range(x)))
states$labels <- c("AL", "AR", "AZ", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "IA",
"ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN",
"MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV", "NY",
"OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VA",
"VT", "WA", "WI", "WV", "WY")
# plot and use padd zone as fill
ggplot(us.map, aes(x=long.transp, y=lat.transp), colour="white") +
geom_polygon(aes(group = group, fill=PADD)) +
geom_text(data=states, aes(long.transp, lat.transp, label=labels), size=3) +
theme(panel.background = element_blank(), # remove background
panel.grid = element_blank(),
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank()) +
coord_equal()
# Create a new column named "subregion" and initialize all values as NA
us_map$subregion <- NA
# Assign subregion values based on region column
for (i in 1:nrow(us_map)) {
if (us_map$region[i] %in% c("connecticut", "maine", "massachusetts", "new hampshire", "rhode island", "vermont")) {
us_map$subregion[i] <- "new england"
} else if (us_map$region[i] %in% c("delaware", "maryland", "new jersey", "new york", "pennsylvania", "district of columbia")) {
us_map$subregion[i] <- "mideast"
} else if (us_map$region[i] %in% c("illinois", "indiana", "michigan", "ohio", "wisconsin")) {
us_map$subregion[i] <- "great lakes"
} else if (us_map$region[i] %in% c("iowa", "kansas", "minnesota", "missouri", "nebraska", "north dakota", "south dakota")) {
us_map$subregion[i] <- "plains"
} else if (us_map$region[i] %in% c("alabama", "arkansas", "florida", "georgia", "kentucky", "louisiana", "mississippi", "north carolina", "south carolina", "tennessee", "virginia", "west virginia")) {
us_map$subregion[i] <- "southeast"
} else if (us_map$region[i] %in% c("arizona", "new mexico", "oklahoma", "texas")) {
us_map$subregion[i] <- "southwest"
} else if (us_map$region[i] %in% c("colorado", "idaho", "montana", "utah", "wyoming")) {
us_map$subregion[i] <- "rocky Mountain"
} else if (us_map$region[i] %in% c("alaska", "california", "hawaii", "nevada", "oregon", "washington")) {
us_map$subregion[i] <- "far west"
} else {
us_map$subregion[i] <- "other"
}
}